library(tidyverse)
library(janitor) # for data cleaning
Warning: package ‘janitor’ was built under R version 4.0.4
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
library(readxl) # for readig excel files
library(visdat) # for a quick look at data quality
Warning: package ‘visdat’ was built under R version 4.0.4
theme_set(theme_light())
Dataset downloaded from ONS on 5th June 2021. Both gardens space and public parks datasets are the April 2020 versions.
# a convenience function for renaming columns
# I am sure there is a better way to do this!
replace_in_column_name <- function(df, pattern, replacement){
df %>%
rename_with(.fn = ~ str_replace(.x, pattern, replacement))
}
# read in and clean data
gardens <- read_xlsx(
"data/osprivateoutdoorspacereferencetables_edited_for_import.xlsx",
sheet = "MSOA gardens",
skip = 1) %>%
# make variables names more conistent
janitor::clean_names() %>%
# focus on variables of interest
select(country_code:msoa_name,
ends_with("count") |
starts_with("total")) %>%
# simplify variables names (now data by housing type has been removed)
replace_in_column_name("total_", "") %>%
replace_in_column_name("private_outdoor_space", "gar") %>%
replace_in_column_name("address", "ad") %>%
replace_in_column_name("adress", "ad") %>%
replace_in_column_name("adresses", "ads") %>%
replace_in_column_name("percentage", "perc") %>%
replace_in_column_name("_m2", "") %>%
replace_in_column_name("average", "ave")
# display data for quick visual checks
gardens
NA
NA
# get a quick visual summary of data types and the amount of missing data
gardens %>%
visdat::vis_dat()
gardens %>%
visdat::vis_miss()
ONS released the 2019 IMD data at LSOA scale. MySociety have produced IMD at various other scales including the MSOA scale.
imd_2019 <- read_csv("data/imd2019_msoa_level_data.csv")
-- Column specification ----------------------------------------------------------------------------------------
cols(
MSOAC = col_character(),
MSOAHOCLN = col_character(),
LAD19C = col_character(),
LAD19N = col_character(),
REG = col_character(),
LSOACOUNT = col_double(),
POPMID15 = col_double(),
`IMD19 SCORE` = col_double(),
MSOARANK = col_double(),
MSOADECILE = col_double(),
MSOAQUINTILE = col_double()
)
imd_2019
ONS provide MSOA level population data. Here I use the most recent release (mid 2019).
msoa_pops <- read_xlsx(
"data/SAPE22DT4-mid-2019-msoa-syoa-estimates-unformatted.xlsx",
sheet = "Mid-2019 Persons",
skip = 4) %>%
# process variable names for conistency
janitor::clean_names() %>%
# select minimal number of columns
select(msoa_code, population = all_ages)
msoa_pops %>%
visdat::vis_miss()
conservative estimate is the the lowest (1.8 mid-rise)
Overall average = 2.4 (2020 see here)
2017-18 was the most year I could find on average occupancy figures broken down by house/flat from MCHLG:
House = 2.5 people
High-rise flat = 1.9
Mid-rise flat = 1.8
More details here.
So for now, I’ll make a conservative assumption that the average occupancy for a flat is 1.8 (i.e. the lower of the mid and high rise figures above).
I looked at trying to estimate more accurately, but the figures for the number of dwellings per block are not available.
2020 estimates of the numbers of high and mid rise flat can be found here. These can be used to calculate an average flat occupancy rate.
12,500 blocks of high rise flats
77,500 blocks of mid rise flats
# calculate scaling factors for ave occupancy in an MSOA
# from national data
nat_ave_occ <- 2.4
nat_house_ave_occ <- 2.5
nat_flat_ave_occ <- 1.9
flat_scale <- nat_flat_ave_occ / nat_ave_occ
house_scale <- nat_house_ave_occ / nat_ave_occ
people_wo_gar <- gardens %>%
left_join(msoa_pops) %>%
mutate(house_ad_without_gar_count = houses_ad_count - houses_ad_with_gar_count,
flats_ad_without_gar_count = flats_ad_count - flats_ad_with_gar_count,
ave_occ = population / ad_count,
ave_occ_flat = flat_scale * ave_occ,
ave_occ_house = house_scale * ave_occ,
pop_calc = (ave_occ_flat * flats_ad_count) + (ave_occ_house * houses_ad_count),
pop_diff = population - pop_calc)
Joining, by = "msoa_code"
ggplot(people_wo_gar, aes(pop_diff)) +
geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 1282 rows containing non-finite values (stat_bin).
NA
May be more accurate to just use average occupancy rates for each MSOA??
people_wo_gar <- gardens %>%
left_join(msoa_pops) %>%
mutate(ad_wo_gar = ad_count * (1 - perc_of_ades_with_gar),
ave_occ = population / ad_count,
people_wo_gar = round(ad_wo_gar * ave_occ)) %>%
select(country_code:msoa_name, people_wo_gar)
Joining, by = "msoa_code"
people_wo_gar
ggplot(people_wo_gar, aes(people_wo_gar)) +
geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 1282 rows containing non-finite values (stat_bin).
https://www.robert-hickman.eu/post/getis-ord-heatmaps-tutorial/
library(sf)
Warning: package ‘sf’ was built under R version 4.0.5
Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
# focus on Englnad and Wales
# (as shapefile for MSOAs only inlcudes England and Wales)
people_wo_gar_EW <- people_wo_gar %>%
filter(country_name == "England" |
country_name == "Wales")
median_msoa_peop_wo_gar = median(people_wo_gar_EW$people_wo_gar, na.rm = TRUE)
# read in MSOA boundary shapefile
msoa_bound <- st_read("data/MSOA_bound/Middle_Layer_Super_Output_Areas_(December_2011)_Boundaries.shp") %>%
select(msoa_code = msoa11cd,
msoa_name = msoa11nm)
Reading layer `Middle_Layer_Super_Output_Areas_(December_2011)_Boundaries' from data source `C:\Users\chris\Desktop\Data Analysis Projects\green_space\data\MSOA_bound\Middle_Layer_Super_Output_Areas_(December_2011)_Boundaries.shp' using driver `ESRI Shapefile'
Simple feature collection with 7201 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 82678 ymin: 5343 xmax: 655604.7 ymax: 657534.1
Projected CRS: OSGB 1936 / British National Grid
# read in countries boundary shapefile (for clipping polygon grid)
country_bound <- st_read("data/Countries_bound/Countries_(December_2017)_Boundaries.shp") %>%
filter(ctry17nm == "England" | ctry17nm == "Wales")
Reading layer `Countries_(December_2017)_Boundaries' from data source `C:\Users\chris\Desktop\Data Analysis Projects\green_space\data\Countries_bound\Countries_(December_2017)_Boundaries.shp' using driver `ESRI Shapefile'
Simple feature collection with 3 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 5512.999 ymin: 5351.297 xmax: 655644.8 ymax: 1220302
Projected CRS: OSGB 1936 / British National Grid
# join garden data with
people_wo_gar_spatial <- msoa_bound %>%
left_join(people_wo_gar_EW) %>%
# median imputation for one msoa with missing
mutate(people_wo_gar = replace_na(people_wo_gar, median_msoa_peop_wo_gar))
Joining, by = c("msoa_code", "msoa_name")
# ###########################
# simulate the locations of people without a garden
# ###########################
# random locations withi an MSOA was too computationally intesnive
# so I went with placing the 'people' at MSOA centroids
people <- people_wo_gar_spatial %>%
mutate(people_wo_gar = round(people_wo_gar / 10),
geometry = st_centroid(geometry)) %>%
uncount(people_wo_gar)
sum(people_wo_gar_spatial$people_wo_gar)
[1] 6702350
# create boundary file
map_bound <- people_wo_gar_spatial %>%
summarise()# %>%
#st_transform(4326)
# people <- st_sample(
# select(people_wo_gar_spatial, -people_wo_gar),
# size = round(people_wo_gar_spatial$people_wo_gar / 100))
# ###########################
# create hexagonal grid
# ###########################
hex_polygons <- st_make_grid(map_bound, 7500,
crs = st_crs(people_wo_gar_spatial),
what = "polygons",
square = FALSE) %>%
st_sf()
# calculate number of people in each polygon
intersects <- st_intersects(hex_polygons, people)
hex_polygons$people_wo_gar <- lengths(intersects)
# ###########################
# create the plot
# ###########################
# ggplot(people_wo_gar_spatial) +
# #geom_sf(data = map_bound, colour = "black", size = 1) +
# geom_sf(data = hex_polygons, fill = "black") + # , aes(fill = people_wo_gar)
# scale_fill_viridis_c(trans = "log", direction = -1) +
# #scale_color_viridis_c(trans = "log") +
# ggthemes::theme_map()
# crop hexogens to outline of England and Wales
hex_polygons_EW <- hex_polygons[country_bound, ]
# hex_polygon_EW %>%
# count(people_wo_gar)
ggplot() +
#geom_sf(data = map_bound) +
#geom_sf(data = st_centroid(msoa_bound), colour = "grey") +
geom_sf(data = hex_polygons_EW, alpha = 0.5, mapping = aes(fill = people_wo_gar)) +
scale_fill_viridis_c(trans = "log", direction = -1) +
theme_void()
Warning: Transformation introduced infinite values in discrete y-axis
# hex_polygons %>%
# filter(people_wo_gar == 0)
#
# people_wo_gar_spatial %>%
# filter(people_wo_gar == 0)
# ###########################
# Smooth
# ###########################
https://pudding.cool/process/regional_smoothing/
ggsave("garden_plot.svg")
Saving 7 x 7 in image
Don't know how to automatically pick scale for object of type localG. Defaulting to continuous.
Looking at change in park usage (in two periods in 2020) vs garden access.
Usage data from Google via ONS
# read in Google/ONS park usage data
park_useage <- read_xlsx("data/change_in_parks_visits_2020_LA.xlsx", skip = 6) %>%
# apply consistent naming style
janitor::clean_names() %>%
# shorten names for readiblity
rename(perc_ch_parks_spring_lock = percent_change_in_visits_to_and_time_spent_in_parks_during_spring_2020_lockdown,
perc_ch_park_summer = percent_change_in_visits_to_and_time_spent_in_parks_during_july_and_august_2020,
lad_code = area_codes,
lad_name = area_names)
park_useage
# read in urban / rural classification
urb_rur_class <- read_csv("data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv") %>%
clean_names() %>%
select(lad_code = lad01cd, lad_name = lad01nm,
class_name = morphology_name,
class_code = morphology_code)
-- Column specification ----------------------------------------------------------------------------------------
cols(
GOR01CD = col_character(),
GOR01NM = col_character(),
CTY01CD = col_logical(),
CTY01NM = col_logical(),
LAD01CD = col_character(),
LAD01NM = col_character(),
MSOA01CD = col_character(),
MSOA01NM = col_character(),
RUC01NM = col_character(),
RUC01CD = col_double(),
`Morphology Name` = col_character(),
`Morphology Code` = col_double(),
`Context Name` = col_character(),
`Context Code` = col_double()
)
Warning: 6366 parsing failures.
row col expected actual file
3599 CTY01CD 1/0/T/F/TRUE/FALSE 09 'data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
3599 CTY01NM 1/0/T/F/TRUE/FALSE Bedfordshire 'data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
3600 CTY01CD 1/0/T/F/TRUE/FALSE 09 'data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
3600 CTY01NM 1/0/T/F/TRUE/FALSE Bedfordshire 'data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
3601 CTY01CD 1/0/T/F/TRUE/FALSE 09 'data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
.... ....... .................. ............ ...................................................................................... [... truncated]
# process urban / rural classification
urb_rur_LAD <- urb_rur_class %>%
mutate(is_urban = class_code == 1) %>%
group_by(lad_name) %>%
summarise(n_tot = n(),
n_urban = sum(is_urban),
prop_urban = n_urban / n_tot) %>%
mutate(is_urban_LAD = prop_urban > 0.75)
# read in LA boundaries
LA_bounds <- st_read("data/LA_bound/Local_Authority_Districts_(December_2019)_Boundaries_UK_BFC.shp") %>%
rename(lad_code = lad19cd,
lad_name = lad19nm )
Reading layer `Local_Authority_Districts_(December_2019)_Boundaries_UK_BFC' from data source `C:\Users\chris\Desktop\Data Analysis Projects\green_space\data\LA_bound\Local_Authority_Districts_(December_2019)_Boundaries_UK_BFC.shp' using driver `ESRI Shapefile'
Simple feature collection with 382 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -116.1928 ymin: 5337.901 xmax: 655653.8 ymax: 1220302
Projected CRS: OSGB 1936 / British National Grid
LA_bounds
Simple feature collection with 382 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -116.1928 ymin: 5337.901 xmax: 655653.8 ymax: 1220302
Projected CRS: OSGB 1936 / British National Grid
First 10 features:
objectid lad_code lad_name lad19nmw bng_e bng_n long lat st_areasha
1 1 E06000001 Hartlepool <NA> 447160 531474 -1.27018 54.67614 93712620
2 2 E06000002 Middlesbrough <NA> 451141 516887 -1.21099 54.54467 53881564
3 3 E06000003 Redcar and Cleveland <NA> 464361 519597 -1.00608 54.56752 245069509
4 4 E06000004 Stockton-on-Tees <NA> 444940 518183 -1.30664 54.55691 204932954
5 5 E06000005 Darlington <NA> 428029 515648 -1.56835 54.53534 197475689
6 6 E06000006 Halton <NA> 354246 382146 -2.68853 53.33424 79084035
7 7 E06000007 Warrington <NA> 362744 388456 -2.56167 53.39163 180627984
8 8 E06000008 Blackburn with Darwen <NA> 369490 422806 -2.46360 53.70080 137022080
9 9 E06000009 Blackpool <NA> 332819 436635 -3.02199 53.82164 34870886
10 10 E06000010 Kingston upon Hull, City of <NA> 511894 431650 -0.30382 53.76920 71583612
st_lengths geometry
1 71011.93 MULTIPOLYGON (((447213.9 53...
2 44481.69 MULTIPOLYGON (((448609.9 52...
3 96703.99 MULTIPOLYGON (((455932.3 52...
4 123408.99 MULTIPOLYGON (((444157 5279...
5 107206.40 MULTIPOLYGON (((423496.6 52...
6 77771.10 MULTIPOLYGON (((358374.7 38...
7 114690.86 MULTIPOLYGON (((367308.2 39...
8 65284.97 MULTIPOLYGON (((369226.3 43...
9 34483.49 MULTIPOLYGON (((332985.7 44...
10 64681.12 MULTIPOLYGON (((510966.6 43...
# aggregate gardens data to LAD scale
gardens_LAD <- gardens %>%
mutate(ad_wo_gar = round(ad_count * (1 - perc_of_ades_with_gar))) %>%
group_by(lad_code, lad_name) %>%
summarise(ad_count = sum(ad_count),
ad_wo_gar = sum(ad_wo_gar),
prop_ad_wo_gar = ad_wo_gar / ad_count)
`summarise()` has grouped output by 'lad_code'. You can override using the `.groups` argument.
# join dat sets
gar_park_use <- park_useage %>%
left_join(gardens_LAD) %>%
left_join(urb_rur_LAD)
Joining, by = c("lad_code", "lad_name")
Joining, by = "lad_name"
# check for NA created during joining
gar_park_use %>%
skimr::skim()
-- Data Summary ------------------------
Values
Name Piped data
Number of rows 354
Number of columns 11
_______________________
Column type frequency:
character 3
logical 1
numeric 7
________________________
Group variables None
-- Variable type: character ------------------------------------------------------------------------------------
# A tibble: 3 x 8
skim_variable n_missing complete_rate min max empty n_unique whitespace
* <chr> <int> <dbl> <int> <int> <int> <int> <int>
1 lad_code 3 0.992 9 52 0 351 0
2 lad_name 2 0.994 4 35 0 352 0
3 perc_ch_park_summer 2 0.994 1 7 0 336 0
-- Variable type: logical --------------------------------------------------------------------------------------
# A tibble: 1 x 5
skim_variable n_missing complete_rate mean count
* <chr> <int> <dbl> <dbl> <chr>
1 is_urban_LAD 42 0.881 0.583 TRU: 182, FAL: 130
-- Variable type: numeric --------------------------------------------------------------------------------------
# A tibble: 7 x 11
skim_variable n_missing complete_rate mean sd p0 p25 p50
* <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 perc_ch_parks_spring_lock 2 0.994 13.0 29.3 -82.1 -5.85 13.0
2 ad_count 4 0.989 79260. 52299. 7707 45916. 63185
3 ad_wo_gar 4 0.989 9292. 9560. 1748 3984. 6249
4 prop_ad_wo_gar 4 0.989 0.110 0.0658 0.0349 0.0787 0.0979
5 n_tot 42 0.881 20.6 13.3 1 13 17
6 n_urban 42 0.881 17.0 14.5 0 8 13
7 prop_urban 42 0.881 0.754 0.268 0 0.587 0.845
p75 p100 hist
* <dbl> <dbl> <chr>
1 29.9 139. ▁▆▇▁▁
2 98078. 447437 ▇▂▁▁▁
3 10019 62972 ▇▁▁▁▁
4 0.122 0.928 ▇▁▁▁▁
5 25 131 ▇▂▁▁▁
6 23 131 ▇▂▁▁▁
7 1 1 ▁▂▂▃▇
gar_park_use %>%
filter(is.na(is_urban_LAD))
# produce plot
ggplot(gar_park_use, aes(prop_ad_wo_gar, perc_ch_parks_spring_lock,
colour = is_urban_LAD)) +
geom_point() +
geom_smooth(method = "lm") +
#scale_x_log10() +
xlim(c(0,0.5)) +
facet_wrap(~is_urban_LAD)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 5 rows containing non-finite values (stat_smooth).
Warning: Removed 5 rows containing missing values (geom_point).
ggplot(gar_park_use, aes(prop_ad_wo_gar, perc_ch_park_summer)) +
geom_point()
Warning: Removed 4 rows containing missing values (geom_point).
mod_1 <- rstanarm::stan_glm(perc_ch_parks_spring_lock ~ prop_ad_wo_gar,
data = filter(gar_park_use,
prop_ad_wo_gar < 0.2 &
is_urban_LAD == TRUE),
refresh = 0)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
median(rstanarm::bayes_R2(mod_1))
[1] 0.1445503
foe_green_space <- read_xlsx("data/(FOE) Green Space Consolidated Data - England - Version 2.1.xlsx",
sheet = "Local Authorities V2.1") %>%
clean_names() %>%
select(lad_code = la_code,
lad_name = la_name,
pop = total_pop_from_ethnicity_data,
bame_pop,
income_index,
pcnt_pop_with_go_space_access,
green_space_area_per_capita)
New names:
* `` -> ...1
# join foe data to existing data
gar_park_use_foe <- gar_park_use %>%
left_join(foe_green_space)
Joining, by = c("lad_code", "lad_name")
# check NAs created by joining
skimr::skim(gar_park_use_foe)
-- Data Summary ------------------------
Values
Name gar_park_use_foe
Number of rows 354
Number of columns 16
_______________________
Column type frequency:
character 3
logical 1
numeric 12
________________________
Group variables None
-- Variable type: character ------------------------------------------------------------------------------------
# A tibble: 3 x 8
skim_variable n_missing complete_rate min max empty n_unique whitespace
* <chr> <int> <dbl> <int> <int> <int> <int> <int>
1 lad_code 3 0.992 9 52 0 351 0
2 lad_name 2 0.994 4 35 0 352 0
3 perc_ch_park_summer 2 0.994 1 7 0 336 0
-- Variable type: logical --------------------------------------------------------------------------------------
# A tibble: 1 x 5
skim_variable n_missing complete_rate mean count
* <chr> <int> <dbl> <dbl> <chr>
1 is_urban_LAD 42 0.881 0.583 TRU: 182, FAL: 130
-- Variable type: numeric --------------------------------------------------------------------------------------
# A tibble: 12 x 11
skim_variable n_missing complete_rate mean sd p0 p25 p50
* <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 perc_ch_parks_spring_lock 2 0.994 13.0 29.3 -82.1 -5.85 13.0
2 ad_count 4 0.989 79260. 52299. 7707 45916. 63185
3 ad_wo_gar 4 0.989 9292. 9560. 1748 3984. 6249
4 prop_ad_wo_gar 4 0.989 0.110 0.0658 0.0349 0.0787 0.0979
5 n_tot 42 0.881 20.6 13.3 1 13 17
6 n_urban 42 0.881 17.0 14.5 0 8 13
7 prop_urban 42 0.881 0.754 0.268 0 0.587 0.845
8 pop 52 0.853 169400. 114457. 7375 97389. 132484.
9 bame_pop 52 0.853 25213. 46089. 565 3308. 7918
10 income_index 52 0.853 5.81 1.46 2.79 4.67 5.81
11 pcnt_pop_with_go_space_access 52 0.853 33.0 11.7 7.48 24.2 33.8
12 green_space_area_per_capita 52 0.853 348. 1405. 2.36 25.5 46.7
p75 p100 hist
* <dbl> <dbl> <chr>
1 29.9 139. ▁▆▇▁▁
2 98078. 447437 ▇▂▁▁▁
3 10019 62972 ▇▁▁▁▁
4 0.122 0.928 ▇▁▁▁▁
5 25 131 ▇▂▁▁▁
6 23 131 ▇▂▁▁▁
7 1 1 ▁▂▂▃▇
8 211063. 1073045 ▇▂▁▁▁
9 21377. 451409 ▇▁▁▁▁
10 7.02 9.01 ▃▆▇▆▃
11 41.0 75.2 ▃▇▇▂▁
12 111. 16646. ▇▁▁▁▁
gar_park_use_foe %>%
filter(is.na(pop)) # almost all Scotland and Wales
# (as expected as FOE is England only)
gar_park_use_foe_eng <- gar_park_use_foe %>%
filter(!is.na(pop)) %>%
mutate(perc_ch_park_summer = na_if(perc_ch_park_summer, "No data"),
perc_ch_park_summer = as.numeric(perc_ch_park_summer),
prop_bame_pop = bame_pop / pop)
gar_park_use_foe_eng
gar_park_use_foe_eng_mod_in <- gar_park_use_foe_eng %>%
select(-lad_code, -lad_name, -pop, -bame_pop,
-n_tot, -n_urban, -is_urban_LAD, -ad_count,-ad_wo_gar,
-perc_ch_park_summer, -pcnt_pop_with_go_space_access, -green_space_area_per_capita)
mod_2 <- rstanarm::stan_glm(perc_ch_parks_spring_lock ~ .,
data = gar_park_use_foe_eng_mod_in,
refresh = 0)
median(rstanarm::bayes_R2(mod_2))
[1] 0.1692073
mod_3 <- rstanarm::stan_glm(perc_ch_parks_spring_lock ~ prop_ad_wo_gar + prop_urban + income_index + prop_bame_pop,
data = gar_park_use_foe_eng_mod_in,
refresh = 0)
median(rstanarm::bayes_R2(mod_3))
[1] 0.1692193
ggplot(gar_park_use_foe_eng_mod_in, aes(prop_bame_pop, perc_ch_parks_spring_lock)) +
geom_point()
p <- ggplot(filter(gar_park_use_foe_eng, prop_urban > 0.8),
aes(prop_ad_wo_gar, perc_ch_parks_spring_lock,
colour = prop_bame_pop,
size = green_space_area_per_capita,
label = lad_name)) +
geom_point(alpha = 0.5) +
scale_colour_viridis_c() +
xlim(c(0,.5))
#scale_x_log10()
plotly::ggplotly(p)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
p2 <- ggplot(data = filter(gar_park_use_foe_eng, prop_urban > 0.8),
aes(prop_bame_pop, prop_ad_wo_gar)) +
geom_point() +
ylim(c(0,0.5))
p2
Warning: Removed 1 rows containing missing values (geom_point).
p3 <- ggplot(filter(gar_park_use_foe_eng, prop_urban > 0.8),
aes(prop_bame_pop, perc_ch_parks_spring_lock,
colour = prop_ad_wo_gar,
size = green_space_area_per_capita,
label = lad_name)) +
geom_point(alpha = 0.5) +
scale_colour_viridis_c(direction = -1, trans = "log", end = 0.9) +
xlim(c(0,.5))
p3
Warning: Removed 7 rows containing missing values (geom_point).
plotly::ggplotly(p3)
p4 <- ggplot(filter(gar_park_use_foe_eng, prop_urban > 0.8),
aes(perc_ch_parks_spring_lock, perc_ch_park_summer,
colour = prop_ad_wo_gar,
size = green_space_area_per_capita,
label = lad_name)) +
geom_point(alpha = 0.5)+
scale_colour_viridis_c(direction = -1, trans = "log", end = 0.9)
p4
Warning: Removed 7 rows containing missing values (geom_point).
plotly::ggplotly(p4)
Major cities
la_region_lookup <- gardens %>%
select(region_code:lad_name) %>%
distinct()
london <- gar_park_use_foe_eng %>%
left_join(la_region_lookup) %>%
filter(region_name == "London")
Joining, by = c("lad_code", "lad_name")
other_big_cities <- gar_park_use_foe_eng %>%
left_join(la_region_lookup) %>%
filter(region_name != "London") %>%
filter(prop_urban >= 0.8) %>%
slice_max(order_by = pop, n = 29)
Joining, by = c("lad_code", "lad_name")
thirty_biggest_cities <- london %>%
bind_rows(other_big_cities) %>%
mutate(is_london = region_name == "London",
gs_pc_bin = cut(green_space_area_per_capita,
breaks = c(0, 10, 25, 50, Inf)),
prop_ad_with_gar = 1 - prop_ad_wo_gar)
colours <- c("grey30", "grey70", "palegreen", "palegreen4")
p <- ggplot(thirty_biggest_cities,
aes(prop_ad_with_gar, perc_ch_parks_spring_lock,
colour = gs_pc_bin,
size = gs_pc_bin,
label = lad_name)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = colours) +
#scale_colour_viridis_c() +
#scale_colour_gradient(low = "grey30", high = "green") +
xlim(c(0.5,1)) +
facet_wrap(~is_london)
#scale_x_log10()
plotly::ggplotly(p)
Warning: Using size for a discrete variable is not advised.
ggplot(thirty_biggest_cities) +
geom_histogram(mapping = aes(green_space_area_per_capita))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p4 <- ggplot(thirty_biggest_cities,
aes(perc_ch_parks_spring_lock, perc_ch_park_summer,
colour = green_space_area_per_capita,
size = green_space_area_per_capita,
label = lad_name)) +
geom_point(alpha = 0.5)+
scale_colour_viridis_c(end = 0.9) +
facet_wrap(~is_london)
p4
plotly::ggplotly(p4)
LA_bounds
Simple feature collection with 382 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -116.1928 ymin: 5337.901 xmax: 655653.8 ymax: 1220302
Projected CRS: OSGB 1936 / British National Grid
First 10 features:
objectid lad19cd lad19nm lad19nmw bng_e bng_n long lat st_areasha
1 1 E06000001 Hartlepool <NA> 447160 531474 -1.27018 54.67614 93712620
2 2 E06000002 Middlesbrough <NA> 451141 516887 -1.21099 54.54467 53881564
3 3 E06000003 Redcar and Cleveland <NA> 464361 519597 -1.00608 54.56752 245069509
4 4 E06000004 Stockton-on-Tees <NA> 444940 518183 -1.30664 54.55691 204932954
5 5 E06000005 Darlington <NA> 428029 515648 -1.56835 54.53534 197475689
6 6 E06000006 Halton <NA> 354246 382146 -2.68853 53.33424 79084035
7 7 E06000007 Warrington <NA> 362744 388456 -2.56167 53.39163 180627984
8 8 E06000008 Blackburn with Darwen <NA> 369490 422806 -2.46360 53.70080 137022080
9 9 E06000009 Blackpool <NA> 332819 436635 -3.02199 53.82164 34870886
10 10 E06000010 Kingston upon Hull, City of <NA> 511894 431650 -0.30382 53.76920 71583612
st_lengths geometry
1 71011.93 MULTIPOLYGON (((447213.9 53...
2 44481.69 MULTIPOLYGON (((448609.9 52...
3 96703.99 MULTIPOLYGON (((455932.3 52...
4 123408.99 MULTIPOLYGON (((444157 5279...
5 107206.40 MULTIPOLYGON (((423496.6 52...
6 77771.10 MULTIPOLYGON (((358374.7 38...
7 114690.86 MULTIPOLYGON (((367308.2 39...
8 65284.97 MULTIPOLYGON (((369226.3 43...
9 34483.49 MULTIPOLYGON (((332985.7 44...
10 64681.12 MULTIPOLYGON (((510966.6 43...